La compagnie d’assurance Canadienne AssurancExpertsInc cherche comme tous ses concurrents ? cibler les meilleurs profils de clients pour les diffèrentes cat?gories d’assurances pour ses strat?gies de Marketing. C’est dans ce cadre que s’inscrit notre ?tude sur l’analyse des donn?es relatives aux clients de la comagnie afin de voir les meilleurs profils de client ? cibler lors des compagnes marketing pour une police d’assurance sur caravane. # Compr?hension m?tier ##Probl?matique: Les compagnes publicitaires par e-mails, bien qu’elle soient efficaces avec les clients int?ress?s, peut cr?er un d?sagr?ment chez les clients non int?r?ss?s. Cette strat?gie devient dans ce cas contre-productive et peut nuire ? l’assurance en terme de client?le et de gaspillage des ressources. ## Objectifs: -Cibler les clients pour une compagne marketing. -Cibler l’envoi des mails personnalis?s. -Pousser les profils rep?r?s ? demander l’assurance caravane. ## Data Science goals: -Soustraire les profils des clients potentiellement int?ress?s par une assurance caravane. -Etablir une connaissance ? priori sur les futurs clients. # Compr?hension des donn?es ##Chargement des libraries
library(DMwR)
library(adabag)
library(ggplot2)
library(corrplot)
library(readxl)
library(caret)
library(resample)
library(randomForest)
library(mlbench)
library(rpart.plot)
library(rpart)
library(smotefamily)
library(ROCR)
library(car)
library(MASS)
library(tfestimators)
library(RODBC)
library(plotly)
library(e1071)
dataAssurance <- read.table(file.choose(),sep = "\t",dec=".",na.strings = "",header = T)
data_assurance<-dataAssurance
Visualisation de notre dataset
dim(dataAssurance)
## [1] 9822 87
head(dataAssurance,n = 5)
## SD1 SD2 SD3 SD4 SD5 SD6 SD7 SD8 SD9 SD10 SD11 SD12 SD13 SD14 SD15 SD16
## 1 33 1 3 2 8 0 5 1 3 7 0 2 1 2 6 1
## 2 37 1 2 2 8 1 4 1 4 6 2 2 0 4 5 0
## 3 37 1 2 2 8 0 4 2 4 3 2 4 4 4 2 0
## 4 9 1 3 3 3 2 3 2 4 5 2 2 2 3 4 3
## 5 40 1 4 2 10 1 4 1 4 7 1 2 2 4 4 5
## SD17 SD18 SD19 SD20 SD21 SD22 SD23 SD24 SD25 SD26 SD27 SD28 SD29 SD30
## 1 2 7 1 0 1 2 5 2 1 1 2 6 1 1
## 2 5 4 0 0 0 5 0 4 0 2 3 5 0 2
## 3 5 4 0 0 0 7 0 2 0 5 0 4 0 7
## 4 4 2 4 0 0 3 1 2 3 2 1 4 0 5
## 5 4 0 0 5 4 0 0 0 9 0 0 0 0 4
## SD31 SD32 SD33 SD34 SD35 SD36 SD37 SD38 SD39 SD40 SD41 SD42 SD43 PO44
## 1 8 8 0 1 8 1 0 4 5 0 0 4 3 0
## 2 7 7 1 2 6 3 2 0 5 2 0 5 4 2
## 3 2 7 0 2 9 0 4 5 0 0 0 3 4 2
## 4 4 9 0 0 7 2 1 5 3 0 0 4 4 0
## 5 5 6 2 1 5 4 0 0 9 0 0 6 3 0
## PO45 PO46 PO47 PO48 PO49 PO50 PO51 PO52 PO53 PO54 PO55 PO56 PO57 PO58
## 1 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## PO59 PO60 PO61 PO62 PO63 PO64 PO65 PO66 PO67 PO68 PO69 PO70 PO71 PO72
## 1 5 0 0 0 0 0 0 0 0 1 0 0 0 0
## 2 2 0 0 0 0 0 2 0 0 0 0 0 0 0
## 3 2 0 0 0 0 0 1 0 0 1 0 0 0 0
## 4 2 0 0 0 0 0 0 0 0 1 0 0 0 0
## 5 6 0 0 0 0 0 0 0 0 0 0 0 0 0
## PO73 PO74 PO75 PO76 PO77 PO78 PO79 PO80 PO81 PO82 PO83 PO84 PO85 CLASS
## 1 0 0 0 0 0 0 0 1 0 0 0 0 0 No
## 2 0 0 0 0 0 0 0 1 0 0 0 0 0 No
## 3 0 0 0 0 0 0 0 1 0 0 0 0 0 No
## 4 0 0 0 0 0 0 0 1 0 0 0 0 0 No
## 5 0 0 0 0 0 0 0 1 0 0 0 0 0 No
## STATUS
## 1 Learning
## 2 Learning
## 3 Learning
## 4 Learning
## 5 Learning
summary(dataAssurance)
## SD1 SD2 SD3 SD4
## Min. : 1.00 Min. : 1.000 Min. :1.000 Min. :1.000
## 1st Qu.:10.00 1st Qu.: 1.000 1st Qu.:2.000 1st Qu.:2.000
## Median :30.00 Median : 1.000 Median :3.000 Median :3.000
## Mean :24.25 Mean : 1.109 Mean :2.678 Mean :2.996
## 3rd Qu.:35.00 3rd Qu.: 1.000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :41.00 Max. :10.000 Max. :6.000 Max. :6.000
## SD5 SD6 SD7 SD8
## Min. : 1.000 Min. :0.0000 Min. :0.000 Min. :0.00
## 1st Qu.: 3.000 1st Qu.:0.0000 1st Qu.:4.000 1st Qu.:0.00
## Median : 7.000 Median :0.0000 Median :5.000 Median :1.00
## Mean : 5.779 Mean :0.7007 Mean :4.638 Mean :1.05
## 3rd Qu.: 8.000 3rd Qu.:1.0000 3rd Qu.:6.000 3rd Qu.:2.00
## Max. :10.000 Max. :9.0000 Max. :9.000 Max. :5.00
## SD9 SD10 SD11 SD12
## Min. :0.000 Min. :0.000 Min. :0.0000 Min. :0.000
## 1st Qu.:2.000 1st Qu.:5.000 1st Qu.:0.0000 1st Qu.:1.000
## Median :3.000 Median :6.000 Median :1.0000 Median :2.000
## Mean :3.263 Mean :6.189 Mean :0.8731 Mean :2.287
## 3rd Qu.:4.000 3rd Qu.:7.000 3rd Qu.:1.0000 3rd Qu.:3.000
## Max. :9.000 Max. :9.000 Max. :7.0000 Max. :9.000
## SD13 SD14 SD15 SD16
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:2.000 1st Qu.:3.000 1st Qu.:0.000
## Median :2.000 Median :3.000 Median :4.000 Median :1.000
## Mean :1.887 Mean :3.237 Mean :4.303 Mean :1.485
## 3rd Qu.:3.000 3rd Qu.:4.000 3rd Qu.:6.000 3rd Qu.:2.000
## Max. :9.000 Max. :9.000 Max. :9.000 Max. :9.000
## SD17 SD18 SD19 SD20
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.0000
## 1st Qu.:2.000 1st Qu.:3.000 1st Qu.:0.000 1st Qu.:0.0000
## Median :3.000 Median :5.000 Median :2.000 Median :0.0000
## Mean :3.307 Mean :4.592 Mean :1.899 Mean :0.4033
## 3rd Qu.:4.000 3rd Qu.:6.000 3rd Qu.:3.000 3rd Qu.:1.0000
## Max. :9.000 Max. :9.000 Max. :9.000 Max. :5.0000
## SD21 SD22 SD23 SD24
## Min. :0.0000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:1.000
## Median :0.0000 Median :3.000 Median :2.000 Median :2.000
## Mean :0.5457 Mean :2.877 Mean :2.227 Mean :2.291
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :9.0000 Max. :9.000 Max. :9.000 Max. :9.000
## SD25 SD26 SD27 SD28
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:2.000
## Median :1.000 Median :2.000 Median :2.000 Median :4.000
## Mean :1.651 Mean :1.595 Mean :2.205 Mean :3.742
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:5.000
## Max. :9.000 Max. :9.000 Max. :9.000 Max. :9.000
## SD29 SD30 SD31 SD32
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:5.000
## Median :1.000 Median :4.000 Median :5.000 Median :6.000
## Mean :1.068 Mean :4.188 Mean :4.819 Mean :6.023
## 3rd Qu.:2.000 3rd Qu.:7.000 3rd Qu.:7.000 3rd Qu.:7.000
## Max. :9.000 Max. :9.000 Max. :9.000 Max. :9.000
## SD33 SD34 SD35 SD36
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:5.000 1st Qu.:1.000
## Median :1.000 Median :2.000 Median :7.000 Median :2.000
## Mean :1.336 Mean :1.957 Mean :6.254 Mean :2.751
## 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:8.000 3rd Qu.:4.000
## Max. :9.000 Max. :9.000 Max. :9.000 Max. :9.000
## SD37 SD38 SD39 SD40
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:0.0000
## Median :2.000 Median :4.000 Median :3.000 Median :0.0000
## Mean :2.577 Mean :3.505 Mean :2.739 Mean :0.8085
## 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:4.000 3rd Qu.:1.0000
## Max. :9.000 Max. :9.000 Max. :9.000 Max. :9.0000
## SD41 SD42 SD43 PO44
## Min. :0.000 Min. :0.000 Min. :1.00 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:3.000 1st Qu.:3.00 1st Qu.:0.0000
## Median :0.000 Median :4.000 Median :4.00 Median :0.0000
## Mean :0.208 Mean :3.805 Mean :4.26 Mean :0.7649
## 3rd Qu.:0.000 3rd Qu.:4.000 3rd Qu.:6.00 3rd Qu.:2.0000
## Max. :9.000 Max. :9.000 Max. :8.00 Max. :3.0000
## PO45 PO46 PO47 PO48
## Min. :0.00000 Min. :0.00000 Min. :0.000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :5.000 Median :0.00000
## Mean :0.03889 Mean :0.07371 Mean :2.956 Mean :0.05488
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:6.000 3rd Qu.:0.00000
## Max. :6.00000 Max. :4.00000 Max. :9.000 Max. :7.00000
## PO49 PO50 PO51 PO52
## Min. :0.0000 Min. :0.000000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.000000 Median :0.00000 Median :0.00000
## Mean :0.1708 Mean :0.008858 Mean :0.01934 Mean :0.09356
## 3rd Qu.:0.0000 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :7.0000 Max. :9.000000 Max. :5.00000 Max. :7.00000
## PO53 PO54 PO55 PO56
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Median :0.0000 Median :0.0000
## Mean :0.0115 Mean :0.215 Mean :0.2023 Mean :0.0115
## 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :6.0000 Max. :6.000 Max. :9.0000 Max. :6.0000
## PO57 PO58 PO59 PO60
## Min. :0.00000 Min. :0.00000 Min. :0.000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000 1st Qu.:0.000000
## Median :0.00000 Median :0.00000 Median :2.000 Median :0.000000
## Mean :0.01873 Mean :0.02331 Mean :1.849 Mean :0.001629
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:4.000 3rd Qu.:0.000000
## Max. :3.00000 Max. :7.00000 Max. :8.000 Max. :3.000000
## PO61 PO62 PO63 PO64
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.01527 Mean :0.02535 Mean :0.0167 Mean :0.04541
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :6.00000 Max. :1.00000 Max. :6.0000 Max. :5.00000
## PO65 PO66 PO67 PO68
## Min. :0.0 Min. :0.00000 Min. :0.00000 Min. : 0.0000
## 1st Qu.:0.0 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.: 0.0000
## Median :0.0 Median :0.00000 Median :0.00000 Median : 1.0000
## Mean :0.4 Mean :0.01405 Mean :0.02128 Mean : 0.5572
## 3rd Qu.:1.0 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.: 1.0000
## Max. :2.0 Max. :5.00000 Max. :1.00000 Max. :12.0000
## PO69 PO70 PO71 PO72
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.0111 Mean :0.04022 Mean :0.00224 Mean :0.0114
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :5.0000 Max. :8.00000 Max. :4.00000 Max. :3.0000
## PO73 PO74 PO75 PO76
## Min. :0.00000 Min. :0.000000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.000000 Median :0.00000 Median :0.00000
## Mean :0.03441 Mean :0.005192 Mean :0.07107 Mean :0.07982
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :6.00000 Max. :6.000000 Max. :3.00000 Max. :8.00000
## PO77 PO78 PO79 PO80
## Min. :0.000000 Min. :0.000000 Min. :0.000000 Min. :0.000
## 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.000
## Median :0.000000 Median :0.000000 Median :0.000000 Median :1.000
## Mean :0.004582 Mean :0.007941 Mean :0.004276 Mean :0.574
## 3rd Qu.:0.000000 3rd Qu.:0.000000 3rd Qu.:0.000000 3rd Qu.:1.000
## Max. :1.000000 Max. :1.000000 Max. :2.000000 Max. :7.000
## PO81 PO82 PO83
## Min. :0.0000000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0.0000000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :0.0000000 Median :0.000000 Median :0.00000
## Mean :0.0009163 Mean :0.005091 Mean :0.03146
## 3rd Qu.:0.0000000 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :1.0000000 Max. :2.000000 Max. :4.00000
## PO84 PO85 CLASS STATUS
## Min. :0.00000 Min. :0.00000 No :9236 Learning:5822
## 1st Qu.:0.00000 1st Qu.:0.00000 Yes: 586 Test :4000
## Median :0.00000 Median :0.00000
## Mean :0.00845 Mean :0.01385
## 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :2.00000 Max. :2.00000
str(dataAssurance)
## 'data.frame': 9822 obs. of 87 variables:
## $ SD1 : int 33 37 37 9 40 23 39 33 33 11 ...
## $ SD2 : int 1 1 1 1 1 1 2 1 1 2 ...
## $ SD3 : int 3 2 2 3 4 2 3 2 2 3 ...
## $ SD4 : int 2 2 2 3 2 1 2 3 4 3 ...
## $ SD5 : int 8 8 8 3 10 5 9 8 8 3 ...
## $ SD6 : int 0 1 0 2 1 0 2 0 0 3 ...
## $ SD7 : int 5 4 4 3 4 5 2 7 1 5 ...
## $ SD8 : int 1 1 2 2 1 0 0 0 3 0 ...
## $ SD9 : int 3 4 4 4 4 5 5 2 6 2 ...
## $ SD10 : int 7 6 3 5 7 0 7 7 6 7 ...
## $ SD11 : int 0 2 2 2 1 6 2 2 0 0 ...
## $ SD12 : int 2 2 4 2 2 3 0 0 3 2 ...
## $ SD13 : int 1 0 4 2 2 3 0 0 3 2 ...
## $ SD14 : int 2 4 4 3 4 5 3 5 3 2 ...
## $ SD15 : int 6 5 2 4 4 2 6 4 3 6 ...
## $ SD16 : int 1 0 0 3 5 0 0 0 0 0 ...
## $ SD17 : int 2 5 5 4 4 5 4 3 1 4 ...
## $ SD18 : int 7 4 4 2 0 4 5 6 8 5 ...
## $ SD19 : int 1 0 0 4 0 2 0 2 1 2 ...
## $ SD20 : int 0 0 0 0 5 0 0 0 1 0 ...
## $ SD21 : int 1 0 0 0 4 0 0 0 0 0 ...
## $ SD22 : int 2 5 7 3 0 4 4 2 1 3 ...
## $ SD23 : int 5 0 0 1 0 2 1 5 8 3 ...
## $ SD24 : int 2 4 2 2 0 2 5 2 1 3 ...
## $ SD25 : int 1 0 0 3 9 2 0 2 1 1 ...
## $ SD26 : int 1 2 5 2 0 2 1 1 1 2 ...
## $ SD27 : int 2 3 0 1 0 2 4 2 0 1 ...
## $ SD28 : int 6 5 4 4 0 4 5 5 8 4 ...
## $ SD29 : int 1 0 0 0 0 2 0 2 1 2 ...
## $ SD30 : int 1 2 7 5 4 9 6 0 9 0 ...
## $ SD31 : int 8 7 2 4 5 0 3 9 0 9 ...
## $ SD32 : int 8 7 7 9 6 5 8 4 5 6 ...
## $ SD33 : int 0 1 0 0 2 3 0 4 2 1 ...
## $ SD34 : int 1 2 2 0 1 3 1 2 3 2 ...
## $ SD35 : int 8 6 9 7 5 9 9 6 7 6 ...
## $ SD36 : int 1 3 0 2 4 0 0 3 2 3 ...
## $ SD37 : int 0 2 4 1 0 5 4 2 7 2 ...
## $ SD38 : int 4 0 5 5 0 2 3 5 2 3 ...
## $ SD39 : int 5 5 0 3 9 3 3 3 1 3 ...
## $ SD40 : int 0 2 0 0 0 0 0 0 0 1 ...
## $ SD41 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ SD42 : int 4 5 3 4 6 3 3 3 2 4 ...
## $ SD43 : int 3 4 4 4 3 3 5 3 3 7 ...
## $ PO44 : int 0 2 2 0 0 0 0 0 0 2 ...
## $ PO45 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO46 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO47 : int 6 0 6 6 0 6 6 0 5 0 ...
## $ PO48 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO49 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO50 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO51 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO52 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO53 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO54 : int 0 0 0 0 0 0 0 3 0 0 ...
## $ PO55 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO56 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO57 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO58 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO59 : int 5 2 2 2 6 0 0 0 0 3 ...
## $ PO60 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO61 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO62 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO63 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO64 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO65 : int 0 2 1 0 0 0 0 0 0 1 ...
## $ PO66 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO67 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO68 : int 1 0 1 1 0 1 1 0 1 0 ...
## $ PO69 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO70 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO71 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO72 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO73 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO74 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO75 : int 0 0 0 0 0 0 0 1 0 0 ...
## $ PO76 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO77 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO78 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO79 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO80 : int 1 1 1 1 1 0 0 0 0 1 ...
## $ PO81 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO82 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO83 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO84 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PO85 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CLASS : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ STATUS: Factor w/ 2 levels "Learning","Test": 1 1 1 1 1 1 1 1 1 1 ...
Visualisation des corr?lations entre les variables sociod?mographiques
dd <- dataAssurance[,1:43]
cc <- cor(dd)
corrplot(t(cc), method="ellipse")
Visualisation des corr?lations entre les variables propri?taires de produits
dd <- dataAssurance[,44:85]
cc <- cor(dd)
corrplot(t(cc), method="ellipse")
Le nombre de clients qui d?sirent ou non avoir une assurance sur caravane
summary(dataAssurance$CLASS)
## No Yes
## 9236 586
plot(dataAssurance$CLASS)
On remarque que le nombre des clients qui souhaitent de leur plein gr? avoir une assurance caravane est tr?s inferieur ? celui des clients qui ne souhaitent pas avoir ce type d’assurance.
boxplot(x = dataAssurance)
ggplot(data=dataAssurance, aes(dataAssurance$SD1)) +
geom_histogram(breaks=seq(1, 41, by=1), aes(fill=..count..)) +
scale_fill_gradient("Count", low="green", high="red") +
ggtitle("Histrogram of Type") +
labs(x="Class", y="Count")
Lower class large families domine notre ?chantillon
a <- dataAssurance[which(dataAssurance$SD1==33),]
df <- data.frame(
group = a$CLASS,
value = a$SD1
)
ggplot(df, aes(x=factor(1), fill=group))+
geom_bar(width = 1)+
coord_polar("y")
Majoritairement , les individues de type “Lower class large” ont r?pondu non ? l’assurance caravane.
Subdiviser le dataset pour r?cuperer les individus qui ont r?pondu OUI
y <- dataAssurance[which(dataAssurance$CLASS=="Yes"),]
head(y,n=50)
## SD1 SD2 SD3 SD4 SD5 SD6 SD7 SD8 SD9 SD10 SD11 SD12 SD13 SD14 SD15 SD16
## 42 11 1 3 3 3 2 7 0 0 9 0 0 2 3 4 0
## 46 38 1 3 3 9 0 5 1 3 7 1 2 3 2 5 1
## 58 12 1 3 2 3 0 6 0 3 7 2 0 1 2 6 2
## 98 9 1 2 3 3 0 6 1 2 7 1 1 5 1 4 5
## 99 36 1 2 4 8 2 4 2 2 7 0 2 2 4 3 1
## 128 3 1 2 4 1 0 4 4 2 6 0 3 3 4 3 7
## 150 8 2 3 3 2 0 5 2 3 7 1 2 2 3 5 3
## 151 8 1 4 3 2 1 5 1 3 7 1 2 0 3 6 3
## 175 34 1 3 2 8 0 7 1 2 8 1 0 0 4 5 0
## 180 41 1 3 3 10 0 7 0 3 8 1 1 1 3 5 1
## 195 36 1 3 3 8 0 7 0 2 7 0 2 0 4 5 0
## 204 38 1 3 2 9 1 0 1 7 4 1 4 1 1 8 4
## 227 10 1 3 3 3 0 9 0 0 7 0 2 2 3 5 5
## 235 38 1 3 2 9 0 6 0 3 5 2 2 2 2 5 0
## 236 8 1 3 3 2 1 7 0 1 6 0 3 2 4 4 4
## 247 1 1 4 3 1 1 5 1 3 6 1 2 0 2 7 8
## 249 33 1 2 3 8 0 7 0 2 7 2 0 0 5 4 0
## 254 38 1 3 2 9 2 2 2 4 7 0 2 1 1 8 3
## 282 8 1 2 4 2 0 7 0 2 9 0 0 2 5 3 0
## 298 8 1 3 3 2 1 5 1 3 7 1 2 0 2 7 5
## 314 13 1 3 3 3 0 6 0 3 6 0 3 2 4 4 4
## 319 35 1 2 3 8 1 5 1 3 5 1 4 3 3 4 2
## 336 12 1 4 2 3 0 4 0 5 9 0 0 0 2 7 0
## 337 38 1 4 2 9 0 2 3 5 6 0 3 2 0 7 0
## 357 8 2 4 3 2 0 5 0 5 9 0 0 0 0 9 0
## 401 31 1 3 3 7 2 3 0 5 4 2 4 3 2 5 0
## 404 34 1 3 2 8 1 5 1 4 6 1 3 2 1 6 3
## 427 38 2 3 2 9 0 6 0 3 6 2 2 1 4 5 1
## 440 6 1 3 3 2 0 7 2 0 9 0 0 0 3 6 6
## 445 33 1 3 3 8 1 4 1 5 7 1 2 1 4 4 2
## 481 32 1 2 3 7 0 5 1 3 6 0 3 3 3 3 0
## 505 12 1 3 2 3 1 3 0 5 7 1 2 0 3 6 2
## 511 8 1 3 3 2 2 5 2 1 7 2 0 0 2 7 4
## 553 3 1 1 2 1 0 5 0 4 0 3 6 6 3 0 3
## 561 8 1 3 3 2 2 3 0 5 7 2 0 0 5 4 1
## 621 39 1 2 2 9 0 3 2 5 5 3 2 1 6 2 1
## 627 33 1 3 3 8 1 4 1 5 7 1 2 1 4 4 2
## 662 8 1 3 3 2 1 5 1 3 7 1 2 0 2 7 5
## 670 38 1 2 2 9 0 4 2 4 7 1 2 3 4 3 1
## 671 24 1 2 2 5 0 4 0 5 3 2 4 4 3 2 1
## 685 8 1 3 3 2 1 6 2 1 7 0 2 1 2 6 4
## 705 33 1 2 3 8 1 4 1 4 6 2 3 3 4 3 1
## 763 32 1 3 3 7 0 6 0 3 7 0 2 0 5 4 2
## 771 33 1 2 3 8 1 4 1 4 6 2 3 3 4 3 1
## 774 38 1 3 2 9 0 4 0 5 9 0 0 0 2 7 1
## 800 1 1 2 4 1 0 2 0 7 6 0 3 5 3 2 3
## 812 3 1 2 3 1 1 5 4 0 9 0 0 1 6 3 4
## 816 8 1 3 3 2 1 7 0 1 6 0 3 2 4 4 4
## 835 12 2 4 2 3 0 2 0 7 9 0 0 0 0 9 0
## 838 3 1 2 3 1 1 5 1 3 6 1 2 0 5 4 4
## SD17 SD18 SD19 SD20 SD21 SD22 SD23 SD24 SD25 SD26 SD27 SD28 SD29 SD30
## 42 5 4 5 0 0 4 0 1 1 2 4 2 2 1
## 46 3 6 2 1 1 2 3 3 1 1 1 5 2 5
## 58 4 4 1 0 0 5 2 1 0 3 2 4 0 0
## 98 4 0 4 0 0 5 1 0 1 8 0 0 0 9
## 99 5 4 1 0 0 3 1 4 1 1 4 4 0 2
## 128 2 0 7 0 0 2 0 1 7 2 0 0 0 0
## 150 4 3 4 0 0 4 1 0 4 2 2 2 0 1
## 151 5 1 3 1 0 4 1 2 3 3 2 2 1 1
## 175 4 5 0 0 0 5 1 4 0 1 5 4 0 4
## 180 4 5 1 1 3 2 3 1 2 1 3 4 0 2
## 195 0 9 0 0 0 2 7 0 0 0 0 9 0 9
## 204 4 2 4 1 0 2 2 3 2 1 5 2 0 6
## 227 2 3 3 0 0 3 2 1 2 3 3 2 0 0
## 235 2 7 0 0 0 2 7 0 0 0 0 9 0 2
## 236 3 2 3 0 0 5 1 1 3 2 3 2 0 0
## 247 1 1 6 0 0 2 2 1 5 3 2 1 0 0
## 249 3 6 2 0 0 2 5 2 2 1 2 5 2 0
## 254 2 5 1 0 0 5 1 3 1 1 4 3 2 4
## 282 4 5 0 0 0 4 1 5 0 0 5 4 1 9
## 298 3 2 3 1 0 4 2 1 4 3 2 2 1 1
## 314 3 2 7 0 0 0 2 0 7 0 0 2 0 0
## 319 2 6 1 1 1 2 4 2 1 1 2 5 1 4
## 336 5 4 2 0 0 2 3 3 1 2 2 4 1 1
## 337 6 3 0 0 0 7 0 2 0 3 4 3 0 4
## 357 5 5 0 0 0 9 0 0 0 4 0 5 0 0
## 401 4 5 0 0 0 2 2 5 0 2 3 3 2 9
## 404 3 4 2 0 0 5 1 2 2 2 3 4 0 1
## 427 6 3 1 0 0 3 3 3 1 3 2 5 0 4
## 440 3 0 6 2 0 2 0 0 7 2 0 0 0 0
## 445 3 4 3 0 2 2 3 1 3 1 3 3 2 3
## 481 1 8 0 0 0 4 0 5 0 1 1 5 3 9
## 505 4 3 3 0 0 3 0 3 3 1 4 3 0 4
## 511 4 2 3 0 0 5 2 1 2 3 3 2 1 0
## 553 3 3 0 0 0 6 0 3 0 4 2 2 2 7
## 561 8 1 3 0 0 4 1 3 1 2 4 4 0 5
## 621 4 4 1 1 1 6 1 2 1 2 2 5 0 8
## 627 3 4 3 0 2 2 3 1 3 1 3 3 2 3
## 662 3 2 3 1 0 4 2 1 4 3 2 2 1 1
## 670 1 8 1 0 0 3 6 0 1 1 1 8 0 3
## 671 6 2 0 0 0 4 2 3 0 2 3 5 0 7
## 685 3 2 5 1 0 2 1 1 5 2 1 2 0 2
## 705 4 5 1 1 0 3 2 4 1 2 2 5 1 5
## 763 4 4 2 2 0 3 3 2 3 2 0 4 0 2
## 771 4 5 1 1 0 3 2 4 1 2 2 5 1 5
## 774 2 6 1 0 0 3 3 3 1 1 3 6 1 5
## 800 4 2 7 0 0 0 2 0 4 0 4 1 0 0
## 812 3 2 6 0 0 2 2 0 5 2 3 1 0 0
## 816 3 2 3 0 0 5 1 1 3 2 3 2 0 0
## 835 3 6 0 0 0 7 0 2 0 2 0 6 2 0
## 838 0 5 2 1 1 2 3 2 3 0 6 0 0 0
## SD31 SD32 SD33 SD34 SD35 SD36 SD37 SD38 SD39 SD40 SD41 SD42 SD43 PO44
## 42 8 7 2 0 7 2 2 4 3 0 1 4 6 2
## 46 4 5 2 2 7 2 4 4 1 1 1 3 4 2
## 58 9 9 0 0 5 4 2 4 4 0 0 4 7 2
## 98 0 8 1 1 4 5 0 5 4 0 0 5 4 0
## 99 7 5 3 2 5 4 2 5 3 0 0 4 3 0
## 128 9 5 4 0 3 6 0 3 6 0 0 4 6 2
## 150 8 8 0 1 6 3 0 5 4 0 0 4 7 0
## 151 8 7 0 2 5 4 1 3 5 1 1 5 7 0
## 175 5 8 1 0 9 0 2 5 2 2 0 4 6 0
## 180 7 5 4 0 6 3 3 4 4 0 0 4 4 0
## 195 0 7 0 2 9 0 3 4 2 2 0 4 3 0
## 204 3 6 0 3 6 3 3 2 4 1 0 4 5 0
## 227 9 9 0 0 3 6 2 0 4 4 0 7 8 2
## 235 7 9 0 0 7 2 0 9 0 0 0 3 4 0
## 236 9 6 3 2 5 4 1 4 4 1 0 5 7 2
## 247 9 7 2 1 5 4 0 1 4 5 1 7 8 2
## 249 9 4 4 2 6 3 2 5 3 0 0 3 3 0
## 254 5 7 2 0 7 2 4 4 2 1 0 4 4 0
## 282 0 7 0 2 5 4 4 4 2 0 0 3 7 0
## 298 8 7 0 2 5 4 1 2 4 3 1 6 7 2
## 314 9 9 0 0 4 5 0 5 4 0 0 4 6 0
## 319 5 6 1 2 7 2 3 5 2 0 0 3 5 3
## 336 8 5 2 2 8 1 2 1 4 2 0 5 8 0
## 337 5 9 0 0 7 2 0 7 0 2 0 4 4 2
## 357 9 6 3 0 2 7 0 9 0 0 0 3 7 2
## 401 0 3 0 6 9 0 7 2 0 0 0 2 1 1
## 404 8 7 2 0 7 2 1 4 4 1 0 4 6 2
## 427 5 8 0 1 8 1 2 3 4 0 0 4 4 2
## 440 9 7 2 0 2 7 0 0 7 2 0 5 8 0
## 445 6 7 2 1 5 4 3 2 3 2 0 4 3 2
## 481 0 7 0 2 9 0 6 2 1 0 0 2 1 0
## 505 5 7 2 0 5 4 2 4 3 0 0 4 7 0
## 511 9 9 0 0 4 5 1 2 7 0 0 5 7 2
## 553 2 5 0 4 7 2 5 4 0 0 0 2 6 0
## 561 4 9 0 0 6 3 2 1 4 3 0 5 7 2
## 621 1 5 1 4 8 1 6 3 1 1 0 2 5 2
## 627 6 7 2 1 5 4 3 2 3 2 0 4 3 0
## 662 8 7 0 2 5 4 1 2 4 3 1 6 7 2
## 670 6 7 1 1 8 1 0 7 2 0 0 4 4 0
## 671 2 7 0 2 9 0 3 4 2 1 0 4 2 2
## 685 7 6 3 1 4 5 1 2 6 2 0 6 7 2
## 705 4 6 1 3 7 2 3 4 3 1 0 3 3 2
## 763 7 7 2 0 2 7 0 5 4 0 0 4 1 2
## 771 4 6 1 3 7 2 3 4 3 1 0 3 3 2
## 774 4 6 1 2 7 2 1 4 4 1 0 4 4 2
## 800 9 9 0 0 2 7 2 2 5 0 0 4 8 2
## 812 9 5 4 0 0 9 0 0 8 1 0 5 6 2
## 816 9 6 3 2 5 4 1 4 4 1 0 5 7 2
## 835 9 9 0 0 9 0 2 0 5 2 0 5 7 2
## 838 9 6 1 3 7 2 3 4 2 1 0 3 6 2
## PO45 PO46 PO47 PO48 PO49 PO50 PO51 PO52 PO53 PO54 PO55 PO56 PO57 PO58
## 42 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 46 0 0 6 0 5 0 0 0 0 0 0 0 0 0
## 58 0 0 0 0 0 0 0 0 0 0 0 0 0 6
## 98 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 99 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 128 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 150 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 151 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 175 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 180 0 3 6 0 0 0 0 0 0 0 0 0 0 0
## 195 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 204 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 227 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 235 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 236 0 0 6 0 0 0 2 0 0 0 0 0 0 0
## 247 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 249 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 254 3 0 6 0 0 0 0 5 0 0 0 0 0 6
## 282 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 298 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 314 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 319 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 336 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 337 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 357 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 401 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 404 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 427 0 0 6 0 0 0 0 0 0 0 5 0 0 0
## 440 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 445 0 0 6 0 0 0 0 0 0 0 0 0 3 0
## 481 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 505 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 511 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 553 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 561 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 621 0 0 0 0 4 0 0 0 0 0 0 0 0 0
## 627 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 662 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 670 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 671 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 685 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 705 0 0 6 0 5 0 0 0 0 0 3 0 0 0
## 763 0 0 0 0 0 0 2 0 0 0 0 0 0 0
## 771 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 774 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 800 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 812 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 816 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 835 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## 838 0 0 6 0 0 0 0 0 0 0 0 0 0 0
## PO59 PO60 PO61 PO62 PO63 PO64 PO65 PO66 PO67 PO68 PO69 PO70 PO71 PO72
## 42 4 0 0 1 0 0 1 0 0 1 0 0 0 0
## 46 3 0 0 0 0 0 1 0 0 1 0 1 0 0
## 58 4 0 0 0 0 0 1 0 0 0 0 0 0 0
## 98 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 99 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 128 4 0 0 0 0 0 1 0 0 1 0 0 0 0
## 150 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 151 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 175 3 0 0 0 0 0 0 0 0 0 0 0 0 0
## 180 6 0 1 0 0 0 0 0 1 1 0 0 0 0
## 195 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 204 3 0 0 0 0 0 0 0 0 1 0 0 0 0
## 227 4 0 0 0 0 0 1 0 0 1 0 0 0 0
## 235 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 236 0 0 0 0 0 0 1 0 0 1 0 0 0 1
## 247 4 0 0 0 0 0 1 0 0 1 0 0 0 0
## 249 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 254 0 0 0 0 0 0 0 1 0 2 0 0 0 0
## 282 3 0 0 0 0 4 0 0 0 1 0 0 0 0
## 298 4 0 0 0 0 0 1 0 0 0 0 0 0 0
## 314 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 319 4 0 0 0 0 0 1 0 0 2 0 0 0 0
## 336 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 337 4 0 0 0 0 3 1 0 0 1 0 0 0 0
## 357 4 0 0 0 0 0 1 0 0 1 0 0 0 0
## 401 3 0 0 0 0 0 1 0 0 1 0 0 0 0
## 404 4 0 0 0 0 0 1 0 0 1 0 0 0 0
## 427 4 0 0 0 0 0 1 0 0 1 0 0 0 0
## 440 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 445 4 0 0 0 0 0 1 0 0 1 0 0 0 0
## 481 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 505 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 511 4 0 0 0 0 0 1 0 0 1 0 0 0 0
## 553 3 0 0 0 0 0 0 0 0 1 0 0 0 0
## 561 4 0 0 0 0 0 1 0 0 1 0 0 0 0
## 621 4 0 0 0 0 0 1 0 0 0 0 1 0 0
## 627 0 0 4 0 0 0 0 0 0 1 0 0 0 0
## 662 4 0 0 0 0 0 1 0 0 1 0 0 0 0
## 670 0 0 2 0 0 0 0 0 0 1 0 0 0 0
## 671 3 0 0 0 0 0 1 0 0 1 0 0 0 0
## 685 4 0 0 0 0 0 1 0 0 2 0 0 0 0
## 705 4 0 0 0 0 0 1 0 0 2 0 2 0 0
## 763 4 1 6 0 1 0 1 0 0 0 0 0 0 1
## 771 3 0 0 0 0 0 1 0 0 1 0 0 0 0
## 774 3 0 0 0 0 0 1 0 0 1 0 0 0 0
## 800 3 0 0 0 0 0 1 0 0 0 0 0 0 0
## 812 4 0 0 0 0 0 1 0 0 1 0 0 0 0
## 816 4 0 0 0 0 0 1 0 0 1 0 0 0 0
## 835 4 0 0 0 0 0 1 0 0 1 0 0 0 0
## 838 4 0 0 0 0 0 1 0 0 1 0 0 0 0
## PO73 PO74 PO75 PO76 PO77 PO78 PO79 PO80 PO81 PO82 PO83 PO84 PO85 CLASS
## 42 0 0 0 0 0 0 0 2 0 0 1 0 0 Yes
## 46 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 58 0 0 0 0 0 0 1 1 0 0 0 0 0 Yes
## 98 0 0 0 0 0 0 0 0 0 0 0 0 0 Yes
## 99 0 0 0 0 0 0 0 0 0 0 0 0 0 Yes
## 128 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 150 0 0 0 0 0 0 0 0 0 0 0 0 0 Yes
## 151 0 0 0 0 0 0 0 0 0 0 0 0 0 Yes
## 175 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 180 0 0 0 0 0 0 0 1 0 1 0 0 0 Yes
## 195 0 0 0 0 0 0 0 0 0 0 0 0 0 Yes
## 204 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 227 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 235 0 0 0 0 0 0 0 0 0 0 0 0 0 Yes
## 236 0 0 0 0 0 0 0 0 0 0 0 0 0 Yes
## 247 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 249 0 0 0 0 0 0 0 0 0 0 0 0 0 Yes
## 254 1 0 0 0 0 0 1 0 0 0 0 0 0 Yes
## 282 0 0 0 0 0 0 0 1 0 0 0 0 1 Yes
## 298 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 314 0 0 0 0 0 0 0 0 0 0 0 0 0 Yes
## 319 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 336 0 0 0 0 0 0 0 0 0 0 0 0 0 Yes
## 337 0 0 0 0 0 0 0 2 0 0 0 0 1 Yes
## 357 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 401 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 404 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 427 0 0 0 2 0 0 0 1 0 0 0 0 0 Yes
## 440 0 0 0 0 0 0 0 0 0 0 0 0 0 Yes
## 445 0 0 0 0 0 1 0 1 0 0 0 0 0 Yes
## 481 0 0 0 0 0 0 0 0 0 0 0 0 0 Yes
## 505 0 0 0 0 0 0 0 0 0 0 0 0 0 Yes
## 511 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 553 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 561 0 0 0 0 0 0 0 2 0 0 0 0 0 Yes
## 621 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 627 0 0 0 0 0 0 0 0 0 1 0 0 0 Yes
## 662 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 670 0 0 0 0 0 0 0 0 0 1 0 0 0 Yes
## 671 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 685 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 705 0 0 0 2 0 0 0 1 0 0 0 0 0 Yes
## 763 0 0 0 0 0 0 0 1 1 1 0 1 0 Yes
## 771 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 774 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 800 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 812 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 816 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 835 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## 838 0 0 0 0 0 0 0 1 0 0 0 0 0 Yes
## STATUS
## 42 Learning
## 46 Learning
## 58 Learning
## 98 Learning
## 99 Learning
## 128 Learning
## 150 Learning
## 151 Learning
## 175 Learning
## 180 Learning
## 195 Learning
## 204 Learning
## 227 Learning
## 235 Learning
## 236 Learning
## 247 Learning
## 249 Learning
## 254 Learning
## 282 Learning
## 298 Learning
## 314 Learning
## 319 Learning
## 336 Learning
## 337 Learning
## 357 Learning
## 401 Learning
## 404 Learning
## 427 Learning
## 440 Learning
## 445 Learning
## 481 Learning
## 505 Learning
## 511 Learning
## 553 Learning
## 561 Learning
## 621 Learning
## 627 Learning
## 662 Learning
## 670 Learning
## 671 Learning
## 685 Learning
## 705 Learning
## 763 Learning
## 771 Learning
## 774 Learning
## 800 Learning
## 812 Learning
## 816 Learning
## 835 Learning
## 838 Learning
ggplot(data=y, aes(y$SD4)) +
geom_histogram(breaks=seq(0, 6, by=1), aes(fill=..count..)) +
scale_fill_gradient("Count", low="green", high="red") +
ggtitle("Histrogram of Type") +
labs(x="Age", y="Count")
ncountType=table(dataAssurance$SD4)
ncountType=data.frame(ncountType)
DemoYesType <- data.frame(
group = ncountType$Var1 ,
value = ncountType$Freq
)
library(plotly)
YesType <- DemoYesType[,c("group", "value")]
colors <- c('rgb(215,94,96)', 'rgb(122,132,133)', 'rgb(145,253,429)', 'rgb(175,105,877)', 'rgb(115,147,23)')
p <- plot_ly(DemoYesType, labels = ~group, values = ~value, type = 'pie', marker = list(colors = colors,
line = list(color = '#FFFFFF', width = 1))) %>%
layout(title = 'Customer Age range')
p
Parmi les individus qui ont r?pondu OUI, la majorit? ont entre 40 et 50 ans.
ggplot(data=y, aes(y$SD1)) +
geom_histogram(breaks=seq(0, 41, by=1), aes(fill=..count..)) +
scale_fill_gradient("Count", low="green", high="red") +
ggtitle("Histrogram of Type") +
labs(x="SubType", y="Count")
ggplot(data=y, aes(y$SD1)) +
geom_histogram(breaks=seq(0, 41, by=1), aes(fill=..count..)) +
scale_fill_gradient("Count", low="lightblue", high="blue") +
ggtitle("Histrogram of SubType") +
labs(x="SubType", y="Count")
{
d1=dataAssurance[,c(1,86)]
f=data.frame()
for(i in seq(1,41,1))
{
f[i,"Freq"]=nrow(d1[which((d1$SD1==i) & (d1$CLASS=="Yes")),])
}
f
f[,"Label"]=c("High Income, expensive child"
,"Very Important Provincials"
,"High status seniors"
,"Affluent senior apartments"
,"Mixed seniors"
,"Career and childcare"
,"Dinki's (double income no kids)"
,"Middle class families"
,"Modern, complete families"
,"Stable family"
,"Family starters"
,"Affluent young families"
,"Young all american family"
,"Junior cosmopolitan"
,"Senior cosmopolitans"
,"Students in apartments"
,"Fresh masters in the city"
,"Single youth"
,"Suburban youth"
,"Etnically diverse"
,"Young urban have-nots"
,"Mixed apartment dwellers"
,"Young and rising"
,"Young, low educated"
,"Young seniors in the city"
,"Own home elderly"
,"Seniors in apartments"
,"Residential elderly"
,"Porchless seniors: no front yard"
,"Religious elderly singles"
,"Low income catholics"
,"Mixed seniors"
,"Lower class large families"
,"Large family, employed child"
,"Village families"
,"Couples with teens 'Married with children'"
,"Mixed small town dwellers"
,"Traditional families"
,"Large religous families"
,"Large family farms"
,"Mixed rurals")
p <- plot_ly(f, labels=f$Label ,values = f$Freq, type = 'pie',textinfo = 'label+percent') %>%
layout(title = 'Pie Chart MOSTYPE Customer Subtype Class YES',
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
p
}
Parmi les individus qui ont r?pondu OUI, la majorit? sont de type “Lower class large” et “Middle class families”
ggplot(data=y, aes(y$SD5)) +
geom_histogram(breaks=seq(0, 11, by=1), aes(fill=..count..)) +
scale_fill_gradient("Count", low="lightblue", high="blue") +
ggtitle("Histrogram of MainType") +
labs(x="MainType", y="Count")
Parmi les individus qui ont r?pondu OUI, la majorit? sont de type “Family with gronw ups”. On remarque aussi les individus de type “driven growers” et “average family”.
ggplot(data=y, aes(y$SD6)) +
geom_histogram(breaks=seq(0, 11, by=1), aes(fill=..count..)) +
scale_fill_gradient("Count", low="lightblue", high="blue") +
ggtitle("Histrogram of Roman Catholics") +
labs(x="% Roman Catholic", y="Count")
à peu près 470 personnes catégorisés comme 0-10% Roman Catholic ont répondu oui à l’assurance caravane.
ggplot(data=y, aes(y$PO44)) +
geom_histogram(breaks=seq(0, 11, by=1), aes(fill=..count..)) +
scale_fill_gradient("Count", low="yellow", high="red") +
ggtitle("Histrogram of Contribution private third party insurance") +
labs(x="Contribution private third party insurance", y="Count")
ncountType=table(y$PO44)
ncountType=data.frame(ncountType)
DemoYesType <- data.frame(
group = ncountType$Var1 ,
value = ncountType$Freq
)
library(plotly)
YesType <- DemoYesType[,c("group", "value")]
colors <- c('rgb(215,94,96)', 'rgb(122,132,133)', 'rgb(145,253,429)', 'rgb(175,105,877)', 'rgb(115,147,23)')
p <- plot_ly(DemoYesType, labels = ~group, values = ~value, type = 'pie', marker = list(colors = colors,
line = list(color = '#FFFFFF', width = 1))) %>%
layout(title = 'Histrogram of Contribution private third party insurance')
p
Les individus qui dépensent entre 1 et 99 dollars ont répondu OUI à l’assurance caravane.
p <- plot_ly(alpha = 0.6) %>%
add_histogram(x = ~dataAssurance[which((dataAssurance$CLASS=="No")),"PO47"],histnorm="percent",name="NON") %>%
add_histogram(x = ~dataAssurance[which((dataAssurance$CLASS=="Yes")),"PO47"],histnorm="percent",name="OUI") %>%
layout(barmode = "overlay")%>%
layout(title ="Contribution car policies")
p
50% de ceux qui ont répondu “non” à l’offre n’ont même pas d’assurance voiture alors que 71% de ceux qui ont répondu “oui” dépensent entre 1000 et 4999.
p <- plot_ly(alpha = 0.6) %>%
add_histogram(x = ~dataAssurance[which((dataAssurance$CLASS=="No")),"PO59"],histnorm="percent",name="NON") %>%
add_histogram(x = ~dataAssurance[which((dataAssurance$CLASS=="Yes")),"PO59"],histnorm="percent",name="OUI") %>%
layout(barmode = "overlay")%>%
layout(title ="Contribution fire policies")
p
46% de ceux qui qui ont répondu “non” à l’offre n’ont pas d’assurance incendie contre 30.88% de ceux qui ont répondu “oui” à l’offre. 51% de ceux qui ont répondu “oui” à l’offre dépensent entre 100 et 499.
data.train<-dataAssurance[which(dataAssurance$STATUS=="Learning"),1:86]
data_assurance_apprentissage <-data.train
data.test<-dataAssurance[which(dataAssurance$STATUS=="Test"),1:86]
data_assurance_test<-data.test
a=c()
i=1
for (i in 1:20) {
model1 <- randomForest(data_assurance_apprentissage$CLASS ~ ., data = data_assurance_apprentissage, ntree = 500, mtry = i, importance = TRUE)
predValid <- predict(model1, data_assurance_test, type = "class")
a[i] = mean(predValid == data_assurance_test$CLASS)
}
a
## [1] 0.94050 0.94050 0.94125 0.94000 0.94075 0.93925 0.93900 0.93725
## [9] 0.93825 0.93725 0.93650 0.93650 0.93575 0.93475 0.93550 0.93575
## [17] 0.93500 0.93600 0.93375 0.93375
plot(1:20,a)
importance(model1)
## No Yes MeanDecreaseAccuracy MeanDecreaseGini
## SD1 19.8344605 -3.04590541 19.4981489 23.304174317
## SD2 2.8486833 4.59084910 4.4592976 2.830382381
## SD3 8.0128570 -5.03688044 7.1490101 5.307305191
## SD4 8.7172665 -1.98928381 8.1762365 7.469725247
## SD5 15.6782300 -4.43285286 15.4696838 12.239412150
## SD6 5.6825151 -2.03257987 5.2050807 6.265044437
## SD7 10.7893591 -2.79127318 9.5197596 12.305806344
## SD8 7.0429738 -1.89583441 6.4457719 9.162264596
## SD9 9.4000987 -0.99670475 9.2013982 12.661998776
## SD10 14.4769665 -2.40597307 14.0897216 8.929162886
## SD11 5.5650693 -1.61678457 5.1407484 5.641102082
## SD12 13.6493147 -6.51159660 12.5816430 7.335753435
## SD13 11.5607834 -4.80177453 10.7410733 8.647531329
## SD14 9.8096761 -4.87543261 8.4279160 11.388545147
## SD15 11.9119167 -6.27512949 10.4090507 11.398553116
## SD16 15.9332043 -5.09389088 15.4646416 11.262688853
## SD17 12.7766811 -1.33296236 12.6769367 12.249461610
## SD18 14.1172380 -3.71316685 14.0160319 11.880037625
## SD19 9.6237108 -2.10056016 9.3749628 9.886999493
## SD20 10.2107362 -2.91412750 9.4986553 6.262687909
## SD21 6.4160962 -3.54783253 5.8778654 3.666077280
## SD22 11.2096715 2.45194089 11.8755571 12.671769255
## SD23 13.5300746 -2.28121920 13.2942848 10.939841078
## SD24 14.8271435 -7.95220751 13.2930413 11.520765695
## SD25 11.9586359 -4.19989110 11.6364017 9.193745149
## SD26 8.6175856 -1.97612966 7.8612876 9.524059829
## SD27 9.8096267 -3.08585706 9.1533055 9.772124318
## SD28 12.7373574 -2.57548045 12.5851383 11.166975661
## SD29 10.1076117 -3.98254417 9.3393665 7.088393401
## SD30 11.9590243 -3.74496901 11.3306811 9.666230197
## SD31 11.0891138 -4.44715764 10.3011470 9.876335313
## SD32 12.7678082 -4.63979465 11.8994717 8.110502942
## SD33 10.7295694 -4.79694887 9.5318815 7.848120110
## SD34 14.0961678 -6.65144178 12.8313473 7.545057365
## SD35 10.5748655 -5.73927954 9.9463611 8.830854216
## SD36 10.6495412 -5.04658978 9.6905358 8.778889692
## SD37 13.4802188 -2.72202497 13.2966507 9.080845295
## SD38 13.0170943 -2.92308075 12.3791859 11.959694137
## SD39 8.0792509 -0.28453395 8.4273180 10.371374038
## SD40 10.9113994 -1.74619653 10.5752480 9.014683956
## SD41 3.7199940 -1.41271957 3.2991917 3.251849107
## SD42 12.0798192 -0.82927127 12.4285802 8.960862302
## SD43 12.1933052 -0.94881969 12.4017325 14.719203654
## PO44 7.1450772 1.84195618 7.7425094 14.359629020
## PO45 -0.1942939 -0.14295183 -0.1991731 1.347412345
## PO46 0.6047811 1.00100150 0.7021377 0.273463079
## PO47 -0.3924323 25.82590851 6.6384782 19.305681255
## PO48 2.5309334 -1.95349355 2.0947393 0.713853332
## PO49 -2.6468618 0.89109601 -2.2433266 4.365414114
## PO50 0.0000000 0.00000000 0.0000000 0.005692308
## PO51 3.6994523 -1.73382770 3.2526683 1.520843376
## PO52 2.6367574 -0.99657627 2.4807179 1.705551206
## PO53 0.0000000 0.00000000 0.0000000 0.017697436
## PO54 14.6818199 -5.05734114 13.4056479 4.447305328
## PO55 -3.5639894 0.55826750 -3.2282308 5.705469523
## PO56 -1.4169837 0.00000000 -1.4170079 0.100756496
## PO57 -0.7591322 0.73178418 -0.4543354 2.473222587
## PO58 5.8666048 5.38685833 7.2377779 1.957793310
## PO59 3.7169034 9.69239190 6.6531811 26.116618773
## PO60 -2.7309091 0.00000000 -2.7316895 0.334687849
## PO61 11.0287648 14.11262369 14.9188857 6.514909861
## PO62 6.1901912 -0.40699833 6.1017685 3.314635100
## PO63 4.5791146 -1.09473931 4.1920485 1.573270369
## PO64 -0.9149208 -0.02432593 -0.8758787 4.479027903
## PO65 7.5756105 0.22886164 7.4995849 10.310920757
## PO66 -1.9649312 0.50343160 -1.8261755 1.113525582
## PO67 1.1423075 -1.41666593 0.9309146 0.280494575
## PO68 -4.7424238 13.31953920 -0.5909349 18.607119527
## PO69 2.2510879 -1.91428393 1.8163889 0.635910198
## PO70 -1.3231355 0.45280012 -1.0314927 4.333924778
## PO71 0.0000000 0.00000000 0.0000000 0.013333333
## PO72 2.9657855 -1.10110210 2.7257511 1.286301926
## PO73 2.4494833 -1.76545909 2.0330419 0.943067799
## PO74 1.4170489 0.00000000 1.4170505 0.035663541
## PO75 11.9860755 -2.36509284 10.8275407 2.970738739
## PO76 -6.1817248 5.31414407 -4.3120843 8.053310645
## PO77 -0.3649460 0.00000000 -0.3665692 0.087747781
## PO78 -1.1993674 0.75416045 -0.8937447 1.474228854
## PO79 3.7639550 4.04152631 4.6580660 1.645438979
## PO80 2.9419581 -0.83247245 2.7447900 8.963574107
## PO81 -1.6777316 0.00000000 -1.6779174 0.230370854
## PO82 11.8089722 15.85863479 16.1689246 5.736106086
## PO83 2.9028249 -0.65494382 2.6982068 5.207567191
## PO84 4.4711537 -2.69305731 3.9260325 1.396539901
## PO85 0.9666416 1.73320163 1.4718062 3.510194181
varImpPlot(model1)
model2 <- randomForest(data_assurance_apprentissage$CLASS ~ ., data = data_assurance_apprentissage, ntree = 500,mtry = 3, importance = TRUE)
predValid <- predict(model2, data_assurance_test, type = "class")
mean(predValid == data_assurance_test$CLASS)
## [1] 0.94075
table(predictions=predValid,actual=data_assurance_test$CLASS)
## actual
## predictions No Yes
## No 3762 237
## Yes 0 1
data.train$CLASS <- as.factor(data.train$CLASS)
data.test$CLASS <- as.factor(data.test$CLASS)
set.seed(123)
grid <- expand.grid(mtry=c(2,3,4))
modelKF <- caret::train(CLASS ~.,
data=data.train,
method= "rf",
trControl= trainControl(method = "cv", number = 3, savePredictions = TRUE),
tuneGrid=grid,
preProcess=c('center','scale'))
predictionsKF<-predict(modelKF,data.test,type = "prob")
pred <- as.data.frame(predictionsKF)
pred1 <- prediction(pred[,2],data.test$CLASS)
perfrfp=performance(pred1,"tpr", "fpr")
plot(perfrfp,colorize = TRUE)
perf <- performance(pred1, "auc")
perf@y.values[[1]]
## [1] 0.6944606
train_control_bootstrap <- trainControl(method="boot", number=10)
# train the model
modelBoot <- caret::train(CLASS~., data=data_assurance_apprentissage, trControl=train_control_bootstrap, method="rf")
predValid_boot <- predict(modelBoot, data_assurance_test, type = "raw")
table(predictions=predValid_boot,actual=data_assurance_test$CLASS)
## actual
## predictions No Yes
## No 3762 238
## Yes 0 0
# summarize results
print(modelBoot)
## Random Forest
##
## 5822 samples
## 85 predictor
## 2 classes: 'No', 'Yes'
##
## No pre-processing
## Resampling: Bootstrapped (10 reps)
## Summary of sample sizes: 5822, 5822, 5822, 5822, 5822, 5822, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.9408737 -0.0003698767
## 43 0.9227213 0.0472691447
## 85 0.9192118 0.0475959873
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
adaboost<-boosting(CLASS ~ .,data = data.train, boos=TRUE, mfinal=20,coeflearn='Breiman')
summary(adaboost)
## Length Class Mode
## formula 3 formula call
## trees 20 -none- list
## weights 20 -none- numeric
## votes 11644 -none- numeric
## prob 11644 -none- numeric
## class 5822 -none- character
## importance 85 -none- numeric
## terms 3 terms call
## call 6 -none- call
errorevol(adaboost,data.train)
## $error
## [1] 0.06011680 0.06011680 0.06011680 0.06080385 0.06097561 0.06028856
## [7] 0.05925799 0.06011680 0.05942975 0.05977327 0.05925799 0.06011680
## [13] 0.05891446 0.05960151 0.05908622 0.05925799 0.05960151 0.06046032
## [19] 0.05942975 0.05908622
##
## attr(,"class")
## [1] "errorevol"
predValid_adab<-predict(adaboost,data.test)
mean(predValid_adab$class == data.test$CLASS)
## [1] 0.93825
table(predictions=predValid_adab$class,actual=data.test$CLASS)
## actual
## predictions No Yes
## No 3742 227
## Yes 20 11
t1<-adaboost$trees[[1]]
rpart.plot(t1, box.palette="RdBu", shadow.col="gray", nn=TRUE,roundint=FALSE)
library(DMwR)
classTree <- DMwR::SMOTE(CLASS~.,data.train,k = 5,perc.over = 600,perc.under = 100,learner = "rpart")
tree.prediction<- predict(classTree,data.test, type="class")
prp(classTree)
rpart.plot(classTree, box.palette="RdBu", shadow.col="gray", nn=TRUE,roundint=FALSE)
n=85
nt=60
neval=n-nt
rep=100
### LDA
set.seed(123456789)
errlin=dim(rep)
for (k in 1:rep) {
## linear discriminant analysis
m1=lda(CLASS~.,data.train)
v <- predict(m1,data.test)
tablin=table(data.test$CLASS,v$class)
errlin[k]=(neval-sum(diag(tablin)))/neval
}
merrlin=mean(errlin)
merrlin
## [1] -148.6
library(e1071)
modelsvm<-svm(CLASS~., data=data.train, method= "C-classification" , kernel="radial" ,cost=10,gamma=0.1,cross=0,fitted=TRUE, probability=TRUE)
modelsvm
##
## Call:
## svm(formula = CLASS ~ ., data = data.train, method = "C-classification",
## kernel = "radial", cost = 10, gamma = 0.1, cross = 0, fitted = TRUE,
## probability = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 10
## gamma: 0.1
##
## Number of Support Vectors: 3733
table(v$class, data.test$CLASS)
##
## No Yes
## No 3731 229
## Yes 31 9
plot(v$x) # make a scatterplot
text(v$x,m1$lev,cex=0.7,pos=4,col=c("red","blue")) # add labels
predi <- prediction(v$x,data.test$CLASS)
perfldap=performance(predi,"tpr", "fpr")
plot(perfldap,colorize = TRUE)
perf <- performance(predi, "auc")
perf@y.values[[1]]
## [1] 0.7246525
presvm = predict(modelsvm,data.test,type="prob")
table(presvm,data.test$CLASS)
##
## presvm No Yes
## No 3703 225
## Yes 59 13
Superposition des courbes ROC pour ?valuer la performance des mod?les et les comparer
plot(perfrfp,col="red")
par(new=TRUE)
plot(perfldap,col="blue")
par(new=TRUE)
#plot(presvm,col="yellow")
data_assurance_apprentissage$CLASS<-as.numeric(data_assurance_apprentissage$CLASS)-1
data_assurance_test$CLASS<-as.numeric(data_assurance_test$CLASS)-1
ModelLinear <-lm(data_assurance_apprentissage$CLASS ~ .,data = data_assurance_apprentissage)
#ModelLinear$coefficients
plot(ModelLinear)
## Warning: not plotting observations with leverage one:
## 4034
## Warning: not plotting observations with leverage one:
## 4034
hist(residuals(ModelLinear))
predValidModelLinear <-predict(ModelLinear,data=data_assurance_test_sc)
plot(predValidModelLinear)
library(MASS)
step<-stepAIC(ModelLinear,direction="both",trace = F)
plot(step)
hist(residuals(step))
#summary(step)
#step$coefficients
predValidStep=predict(step,data_assurance_test)
plot(predValidStep)
ModelLR <- glm(data_assurance_apprentissage$CLASS~.,data=data_assurance_apprentissage, family="binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
ModelLR
##
## Call: glm(formula = data_assurance_apprentissage$CLASS ~ ., family = "binomial",
## data = data_assurance_apprentissage)
##
## Coefficients:
## (Intercept) SD1 SD2 SD3 SD4
## 2.542e+02 6.580e-02 -1.832e-01 -2.696e-02 2.096e-01
## SD5 SD6 SD7 SD8 SD9
## -2.767e-01 -1.142e-01 -1.910e-02 -1.618e-02 -6.817e-02
## SD10 SD11 SD12 SD13 SD14
## 2.310e-01 8.509e-02 1.467e-01 -8.291e-02 -1.154e-01
## SD15 SD16 SD17 SD18 SD19
## -8.140e-02 9.717e-04 -9.077e-02 -1.994e-01 8.883e-02
## SD20 SD21 SD22 SD23 SD24
## 3.918e-02 -1.169e-01 1.353e-01 3.976e-02 9.954e-02
## SD25 SD26 SD27 SD28 SD29
## 2.690e-02 -8.801e-03 1.200e-02 9.016e-02 -2.468e-02
## SD30 SD31 SD32 SD33 SD34
## -1.472e+01 -1.469e+01 1.819e-01 1.507e-01 9.325e-02
## SD35 SD36 SD37 SD38 SD39
## -1.445e+01 -1.451e+01 1.181e-01 1.366e-01 1.009e-01
## SD40 SD41 SD42 SD43 PO44
## 1.144e-01 -1.607e-01 9.214e-02 6.856e-02 5.954e-01
## PO45 PO46 PO47 PO48 PO49
## -2.757e-01 -4.405e-01 2.306e-01 1.215e+01 -8.101e-02
## PO50 PO51 PO52 PO53 PO54
## -2.106e+00 1.014e+00 7.229e-01 -5.525e+00 2.170e-01
## PO55 PO56 PO57 PO58 PO59
## -2.382e-01 -4.523e-01 1.444e+00 8.239e-01 2.401e-01
## PO60 PO61 PO62 PO63 PO64
## -8.658e+00 -1.886e-01 3.664e-01 -1.068e+00 -1.676e-01
## PO65 PO66 PO67 PO68 PO69
## -9.293e-01 4.197e-01 2.762e-01 -3.902e-02 -7.298e+01
## PO70 PO71 PO72 PO73 PO74
## 2.418e-01 -4.490e+00 -1.351e+00 -2.376e+00 -8.749e-01
## PO75 PO76 PO77 PO78 PO79
## -1.060e+00 4.789e-01 3.997e-01 -3.163e+00 -3.212e+00
## PO80 PO81 PO82 PO83 PO84
## -4.118e-01 1.047e+01 2.516e+00 2.318e-01 1.947e+00
## PO85
## 1.078e+00
##
## Degrees of Freedom: 5821 Total (i.e. Null); 5736 Residual
## Null Deviance: 2636
## Residual Deviance: 2243 AIC: 2415
plot(ModelLR)
## Warning: not plotting observations with leverage one:
## 4034
## Warning: not plotting observations with leverage one:
## 4034
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
predValidModelLR=predict(ModelLR,data_assurance_test)
max(predValidModelLR)
## [1] 2.631314
plot(predValidModelLR)
response <- function() "CLASS"
data =dataAssurance
features <- function() setdiff(names(data), response())
set.seed(123)
index = seq(1,max(which(data$STATUS=="Learning")))
data$STATUS<-NULL
datatrain = data[ index, ]
datatest = data[ -index, ]
data_train <- as.data.frame(datatrain)
data_test <- as.data.frame(datatest)
feature_columns <- feature_columns(
column_numeric(features())
)
classifier <- dnn_classifier(
feature_columns = feature_columns,
hidden_units = c(30,20,10,5),
n_classes = 2
)
data_input_fn <- function(data) {
input_fn(data, features = features(), response = response())
}
t<-train(classifier, input_fn = data_input_fn(data_train))
plot(t)
predictions <- predict(classifier, input_fn = data_input_fn(data_test))
evaluation <- evaluate(classifier, input_fn = data_input_fn(data_test))
summary(evaluation)
## accuracy accuracy_baseline auc auc_precision_recall
## Min. :0.9405 Min. :0.9405 Min. :0.6803 Min. :0.1257
## 1st Qu.:0.9405 1st Qu.:0.9405 1st Qu.:0.6803 1st Qu.:0.1257
## Median :0.9405 Median :0.9405 Median :0.6803 Median :0.1257
## Mean :0.9405 Mean :0.9405 Mean :0.6803 Mean :0.1257
## 3rd Qu.:0.9405 3rd Qu.:0.9405 3rd Qu.:0.6803 3rd Qu.:0.1257
## Max. :0.9405 Max. :0.9405 Max. :0.6803 Max. :0.1257
## average_loss label/mean loss precision
## Min. :0.2251 Min. :0.0595 Min. :28.14 Min. :0
## 1st Qu.:0.2251 1st Qu.:0.0595 1st Qu.:28.14 1st Qu.:0
## Median :0.2251 Median :0.0595 Median :28.14 Median :0
## Mean :0.2251 Mean :0.0595 Mean :28.14 Mean :0
## 3rd Qu.:0.2251 3rd Qu.:0.0595 3rd Qu.:28.14 3rd Qu.:0
## Max. :0.2251 Max. :0.0595 Max. :28.14 Max. :0
## prediction/mean recall global_step
## Min. :0.03245 Min. :0 Min. :46
## 1st Qu.:0.03245 1st Qu.:0 1st Qu.:46
## Median :0.03245 Median :0 Median :46
## Mean :0.03245 Mean :0 Mean :46
## 3rd Qu.:0.03245 3rd Qu.:0 3rd Qu.:46
## Max. :0.03245 Max. :0 Max. :46
plot(evaluation)